Check working directory

getwd()
## [1] "/Users/alexg/R files/hair_cortisol/skew-normal FINAL"

Load packages

library(readxl)
library(psych)
library(dlookr)
## Registered S3 methods overwritten by 'dlookr':
##   method          from  
##   plot.transform  scales
##   print.transform scales
## 
## Attaching package: 'dlookr'
## The following object is masked from 'package:psych':
## 
##     describe
## The following object is masked from 'package:base':
## 
##     transform
library(vtable)
## Loading required package: kableExtra
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.22.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## The following object is masked from 'package:psych':
## 
##     cs
## The following object is masked from 'package:stats':
## 
##     ar
library(rethinking)
## Loading required package: cmdstanr
## This is cmdstanr version 0.8.0
## - CmdStanR documentation and vignettes: mc-stan.org/cmdstanr
## - CmdStan path: /Users/alexg/.cmdstan/cmdstan-2.36.0
## - CmdStan version: 2.36.0
## Loading required package: posterior
## This is posterior version 1.6.1
## 
## Attaching package: 'posterior'
## The following object is masked from 'package:dlookr':
## 
##     entropy
## The following objects are masked from 'package:stats':
## 
##     mad, sd, var
## The following objects are masked from 'package:base':
## 
##     %in%, match
## Loading required package: parallel
## rethinking (Version 2.42)
## 
## Attaching package: 'rethinking'
## The following objects are masked from 'package:brms':
## 
##     LOO, stancode, WAIC
## The following objects are masked from 'package:psych':
## 
##     logistic, logit, sim
## The following object is masked from 'package:stats':
## 
##     rstudent
library(loo)
## This is loo version 2.8.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
## 
## Attaching package: 'loo'
## The following object is masked from 'package:rethinking':
## 
##     compare
library(priorsense)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()      masks psych::%+%()
## ✖ ggplot2::alpha()    masks psych::alpha()
## ✖ tidyr::expand()     masks reshape::expand()
## ✖ tidyr::extract()    masks dlookr::extract()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ purrr::map()        masks rethinking::map()
## ✖ reshape::rename()   masks dplyr::rename()
## ✖ lubridate::stamp()  masks reshape::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vioplot)
## Loading required package: sm
## Package 'sm', version 2.2-6.0: type help(sm) for summary information
## 
## Attaching package: 'sm'
## 
## The following object is masked from 'package:dlookr':
## 
##     binning
## 
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(bayesplot)
## This is bayesplot version 1.12.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
## 
## Attaching package: 'bayesplot'
## 
## The following object is masked from 'package:posterior':
## 
##     rhat
## 
## The following object is masked from 'package:brms':
## 
##     rhat
library(bayestestR)

Load data

df <- read_xlsx("hair_cort_dog_all.xlsx", col_types = c("text", "text",  
                               "text", "text", "text", "text",
                               "text", "numeric","text", "skip",
                               "numeric", "skip", "skip", 
                               "numeric", "skip"))
df <- as.data.frame(df)

INITIAL DATA PLOTTING AND EXPLORATION

Check characteristics of df

dim(df) # will tell you how many rows and columns the dataset has
## [1] 73 11
class(df) # will tell you what data structure has the dataset been assigned
## [1] "data.frame"

Explore the dataset to understand its structure.

head(df)
##   number   group visit season breed_group coat_colour    sex age comorbidity
## 1     c1 stopped    v0 winter         ret        dark   Male  43         yes
## 2     c2 stopped    v0 autumn         mix        dark   Male 105         yes
## 3     c3 stopped    v0 spring        ckcs         mix Female 117         yes
## 4     c4 stopped    v0 summer         ret        dark Female 108         yes
## 5     c5 stopped    v0 summer         ret        dark Female 110         yes
## 6     c6 stopped    v0 winter         mix       light Female 120         yes
##   fat_percent cortisol
## 1    52.21393 4.924220
## 2    38.52059 7.304202
## 3    46.94916 1.590000
## 4    44.46813 0.861570
## 5    39.59363 6.217317
## 6          NA 4.426785

1. Get summary stats for numeric data

numeric_df <- Filter(is.numeric, df)
describe(numeric_df) # the describe function in psych provides summary stats
## # A tibble: 3 × 26
##   described_variables     n    na  mean    sd se_mean   IQR skewness kurtosis
##   <chr>               <int> <int> <dbl> <dbl>   <dbl> <dbl>    <dbl>    <dbl>
## 1 age                    73     0 95.8  35.6     4.16 44      -0.104 -0.00589
## 2 fat_percent            55    18 40.5   7.82    1.05  7.82   -0.294  1.12   
## 3 cortisol               73     0  8.11 16.5     1.93  5.43    4.05  18.7    
## # ℹ 17 more variables: p00 <dbl>, p01 <dbl>, p05 <dbl>, p10 <dbl>, p20 <dbl>,
## #   p25 <dbl>, p30 <dbl>, p40 <dbl>, p50 <dbl>, p60 <dbl>, p70 <dbl>,
## #   p75 <dbl>, p80 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, p100 <dbl>

2. Check normality of all numeric variables

a. graphical assessment

plot_normality(numeric_df)

b. shapiro-wilk test

apply(numeric_df, 2, shapiro.test)
## $age
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.97361, p-value = 0.1288
## 
## 
## $fat_percent
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.97956, p-value = 0.4692
## 
## 
## $cortisol
## 
##  Shapiro-Wilk normality test
## 
## data:  newX[, i]
## W = 0.46269, p-value = 6.756e-15

c. repeat Q-Q plots with transformed data

i. log(cortisol)

qqnorm(df$cortisol)
qqline(df$cortisol, col = "red")

qqnorm(log(df$cortisol))
qqline(log(df$cortisol), col = "red")

ii Shapiro test for log cortisol

shapiro.test(log(df$cortisol))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(df$cortisol)
## W = 0.94725, p-value = 0.004126

Check data numerically

summary(df$cortisol)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   0.4141   1.4119   2.3331   8.1089   6.8455 104.6172
summary(log(df$cortisol))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8817  0.3449  0.8472  1.1816  1.9236  4.6503

a. Log-transform cortisol

df$lgCort <- log(df$cortisol)
summary(df$lgCort)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8817  0.3449  0.8472  1.1816  1.9236  4.6503

i. Visualise

hist(df$lgCort)

b. Create simple category name for breed and convert to factor

df$breed <- df$breed_group
df$breed <- factor(df$breed, levels = c("mix", "ckcs", "pug", "ret", "other"))
head(df$breed)
## [1] ret  mix  ckcs ret  ret  mix 
## Levels: mix ckcs pug ret other

c. Make light hair colour the reference category

df$coat_colour <- factor(df$coat_colour, levels = c("light", "mix", "dark"), ordered = FALSE)
head(df$coat_colour)
## [1] dark  dark  mix   dark  dark  light
## Levels: light mix dark

4. Generate data summary

sumtable(df)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 75 Max
group 73
… completed 42 58%
… stopped 31 42%
visit 73
… v0 52 71%
… v1 21 29%
season 73
… autumn 21 29%
… spring 14 19%
… summer 22 30%
… winter 16 22%
breed_group 73
… ckcs 7 10%
… mix 16 22%
… other 26 36%
… pug 7 10%
… ret 17 23%
coat_colour 73
… light 27 37%
… mix 16 22%
… dark 30 41%
sex 73
… Female 43 59%
… Male 30 41%
age 73 96 36 16 73 117 182
comorbidity 73
… no 15 21%
… yes 58 79%
fat_percent 55 40 7.8 18 37 45 61
cortisol 73 8.1 16 0.41 1.4 6.8 105
lgCort 73 1.2 1.2 -0.88 0.34 1.9 4.7
breed 73
… mix 16 22%
… ckcs 7 10%
… pug 7 10%
… ret 17 23%
… other 26 36%

5. Visualise associations

a. Between lgCortisol and breed with a violin plot (vioplot package)

par(mfrow = c(1,1))
vioplot(lgCort ~ breed, col = "firebrick",
        data = df)

b. Between lgCortisol and breed with a stripchart

stripchart(lgCort ~ breed, vertical = TRUE, method = "jitter",
           col = "steelblue3", data = df, pch = 20)

c. between lgCortisol and coat_colour with a violin plot (vioplot package)

par(mfrow = c(1,1))
vioplot(lgCort ~ coat_colour, col = "firebrick",
        data = df)

d. between lgCortisol and coat_colour with a stropchart

stripchart(lgCort ~ coat_colour, vertical = TRUE, method = "jitter",
           col = "steelblue3", data = df, pch = 20)

STANDARDISE DATA FOR MODELLING

1. Standardise cortisol

df$slgCort <- standardize(df$lgC)
summary(df$slgCort)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.7079 -0.6925 -0.2768  0.0000  0.6142  2.8713

a. visualise standardised lgCort

hist(df$slgCort)

2. create dataset only containing complete data

df2 <- na.omit(df)

MODEL FOR THE EFFECT OF COUT COLOUR ON HAIR CORTISOL

1. Model code

model <- brm(slgCort ~ coat_colour + breed + + (1 | visit), family = skew_normal(), data = df)

Rationale… casual diagram implies that breed is a counfounder for the effect of hair colour on hair cortsol. Therefore, need to include in the model.

2. Check what priors need to be set

default_prior(slgCort ~ coat_colour + breed +  + (1 | visit),
                   family = skew_normal(),
                   data = df)
##                    prior     class            coef group resp dpar nlpar lb ub
##             normal(0, 4)     alpha                                            
##                   (flat)         b                                            
##                   (flat)         b       breedckcs                            
##                   (flat)         b      breedother                            
##                   (flat)         b        breedpug                            
##                   (flat)         b        breedret                            
##                   (flat)         b coat_colourdark                            
##                   (flat)         b  coat_colourmix                            
##  student_t(3, -0.3, 2.5) Intercept                                            
##     student_t(3, 0, 2.5)        sd                                        0   
##     student_t(3, 0, 2.5)        sd                 visit                  0   
##     student_t(3, 0, 2.5)        sd       Intercept visit                  0   
##     student_t(3, 0, 2.5)     sigma                                        0   
##        source
##       default
##       default
##  (vectorized)
##  (vectorized)
##  (vectorized)
##  (vectorized)
##  (vectorized)
##  (vectorized)
##       default
##       default
##  (vectorized)
##  (vectorized)
##       default

Published information about associations with hair cortisol

  1. No published data about effects on breed, but this is plausible However, unclear as to which breeds will differ and which way. Therefore, use a regularising prior but keep it neutral and broad.

  2. In one study, dogs with a light coat colour had greater log (hair cortisol) than those with a mix or dark colour. Effect was small e.g. 0.070 (mix) or 0.075 (dark). There can justify a prior of that magnitideu, but keep general and regularising REF: Bowland. Front. Vet. Sci. 7:565346. doi: 10.3389/fvets.2020.565346

3. Set priors

# Set individual priors
prior_int <- set_prior("normal(0, 0.5)", class = "Intercept")
prior_sig <- set_prior("exponential(1)", class = "sigma")
prior_b <- set_prior("normal(0, 1)", class = "b")
prior_b_coat_m <- set_prior("normal(-0.070, 1)", class = "b", coef = "coat_colourmix")
prior_b_coat_d <- set_prior("normal(-0.075, 1)", class = "b", coef = "coat_colourdark")
prior_sd <- set_prior("normal(0, 1)", class = "sd")
prior_alpha <- set_prior("normal(4, 2)", class = "alpha")

# Combine priors into list
priors <- c(prior_int, prior_sig, prior_b, prior_b_coat_m, prior_b_coat_d, prior_sd, prior_alpha)

4. Plot priors

a. Prior for intercept

x <- seq(-3, 3, length.out = 100)
y <- dnorm(x, mean = 0, sd = 0.5)
plot(y ~ x, type = "l")

b. Prior for sigma

x <- seq(0, 3, length.out = 100)
y <- dexp(x, rate = 1)
plot(y ~ x, type = "l")

b.ii. Alpha for skew normal distribution

Based on distribution of log normal hair cortisol, expect things to be skewed to the right. Try different levels of alpha for skew normal… and an alpha of 4 seems to be a good fit for the shape of the skew in the log hair cortisol for this dataset

x <- seq(-3, 5, length.out = 100)
y <- dskew_normal(x, mu = 0, sigma = 1, alpha = 4)
plot(y ~ x, type = "l")

c. Prior for coat_colour_mix beta

x <- seq(-3, 3, length.out = 100)
y <- dnorm(x, mean = -0.07, sd = 1)
plot(y ~ x, type = "l")

e. Prior for coat_colour_dark beta

x <- seq(-3, 3, length.out = 100)
y <- dnorm(x, mean = -0.075, sd = 1)
plot(y ~ x, type = "l")

5. Run model

Increased adapt_delta >0.8 (0.9 here), as had divergent transitions

set.seed(666)
model <- brm(slgCort ~ coat_colour + breed + (1 | visit),
                   family = skew_normal(),
                   prior = priors,
                   data = df,
                   control=list(adapt_delta=0.999, stepsize = 0.001, max_treedepth =15),
                   iter = 8000, warmup = 2000,
                   cores = 4,
                   save_pars = save_pars(all =TRUE),
                   sample_prior = TRUE)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'

6. Get summary of model

summary(model)
##  Family: skew_normal 
##   Links: mu = identity; sigma = identity; alpha = identity 
## Formula: slgCort ~ coat_colour + breed + (1 | visit) 
##    Data: df (Number of observations: 73) 
##   Draws: 4 chains, each with iter = 8000; warmup = 2000; thin = 1;
##          total post-warmup draws = 24000
## 
## Multilevel Hyperparameters:
## ~visit (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.38      0.36     0.01     1.35 1.00     7766     8157
## 
## Regression Coefficients:
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           0.19      0.34    -0.49     0.86 1.00    10234    12473
## coat_colourmix     -0.50      0.29    -1.06     0.07 1.00    14208    15167
## coat_colourdark    -0.33      0.23    -0.78     0.13 1.00    15303    16240
## breedckcs           0.22      0.37    -0.54     0.91 1.00    15883    15971
## breedpug           -0.06      0.37    -0.80     0.65 1.00    14545    15719
## breedret           -0.04      0.29    -0.61     0.51 1.00    14158    16886
## breedother          0.10      0.25    -0.39     0.61 1.00    14064    15235
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     1.02      0.09     0.85     1.22 1.00    18453    16951
## alpha     4.44      1.44     1.97     7.59 1.00    16907    13996
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

7. MCMC diagnostics

plot(model)

Looking for hairy caterpillars

b. try a trank plot as well

mcmc_plot(model, type = 'rank_overlay')

8. Calculate 95% HPDI for coat_colour_mix

Usually better than the compatability intervals given in the summary

draws <- as.matrix(model)
HPDI(draws[,2], 0.97)
##      |0.97      0.97| 
## -1.1269354  0.1290574

9. Calculate 95% HPDI for coat_colour_dark

Usually better than the compatability intervals given in the summary

draws <- as.matrix(model)
HPDI(draws[,3], 0.97) 
##      |0.97      0.97| 
## -0.8304157  0.1737404

9. Calculate R2 for model

bayes_R2(model, probs = c(0.015, 0.5, 0.985)) # 0.015, 0.5, 0.985 are the quantiles
##      Estimate  Est.Error       Q1.5        Q50     Q98.5
## R2 0.08756415 0.03930199 0.02095567 0.08274138 0.1862381
loo_R2(model, probs = c(0.015, 0.5, 0.985)) # 0.015, 0.5, 0.985 are the quantiles
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
##       Estimate  Est.Error       Q1.5         Q50       Q98.5
## R2 -0.09748751 0.05196706 -0.2278252 -0.09371635 0.001627205

CHECKS ON MODEL

1. Basic check of simulations based on posterior distribution, versus the real data distribution

checks whether actual data is similar to simulated data.

pp_check(model, ndraws = 100) 

2. Check some individual draws versus observed using pp_check

par(mfrow = c(1,1))
pp_check(model, type = "hist", ndraws = 11, binwidth = 0.25) # separate histograms of 11 MCMC draws vs actual data

3. Other pp_check graphs

pp_check(model, type = "error_hist", ndraws = 11) # separate histograms of errors for 11 draws
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

pp_check(model, type = "scatter_avg", ndraws = 100) # scatter plot

pp_check(model, type = "stat_2d") #  scatterplot of joint posteriors
## Using all posterior draws for ppc type 'stat_2d' by default.
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.

# PPC functions for predictive checks based on (approximate) leave-one-out (LOO) cross-validation
pp_check(model, type = "loo_pit_overlay", ndraws = 1000) 
## Warning: Found 2 observations with a pareto_k > 0.7 in model '.x1'. We
## recommend to set 'moment_match = TRUE' in order to perform moment matching for
## problematic observations.
## NOTE: The kernel density estimate assumes continuous observations and is not optimal for discrete observations.

5. Pairs plot

pairs(model)

PSIS LOO-CV to check model performance

loo_model <- loo(model, moment_match = TRUE)
loo_model
## 
## Computed from 24000 by 73 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo   -104.4  6.1
## p_loo         7.8  1.6
## looic       208.7 12.1
## ------
## MCSE of elpd_loo is 0.0.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.5, 1.2]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.

AUTOMATED PRIOR SENSITIVITY USING THE PRIOR SENSE PACKAGE

1. Sensitivity check

First, check the sensitivity of the prior and likelihood to power-scaling. Posterior and posteriors resulting from power-scaling.

powerscale_sensitivity(model, variable = c("b_Intercept", "sigma", "b_breedckcs", "b_breedother", "b_breedpug", "b_breedret", "b_coat_colourmix", "b_coat_colourdark"))
## Sensitivity based on cjs_dist
## Prior selection: all priors
## Likelihood selection: all data
## 
##           variable prior likelihood diagnosis
##        b_Intercept 0.036      0.047         -
##              sigma 0.032      0.171         -
##        b_breedckcs 0.022      0.100         -
##       b_breedother 0.013      0.078         -
##         b_breedpug 0.022      0.077         -
##         b_breedret 0.013      0.080         -
##   b_coat_colourmix 0.030      0.105         -
##  b_coat_colourdark 0.023      0.103         -

2. Kernel density

powerscale_plot_dens(model, variable = c("b_Intercept", "sigma", "b_breedckcs", "b_breedother", "b_breedpug", "b_breedret", "b_coat_colourmix", "b_coat_colourdark"), facet_rows = "variable")

3. Empirical cumulative distribution functions

powerscale_plot_ecdf(model, variable = c("b_Intercept", "sigma", "b_breedckcs", "b_breedother", "b_breedpug", "b_breedret", "b_coat_colourmix", "b_coat_colourdark"), facet_rows = "variable")

4. Quantities

powerscale_plot_quantities(model, variable = c("b_Intercept", "sigma", "b_breedckcs", "b_breedother", "b_breedpug", "b_breedret", "b_coat_colourmix", "b_coat_colourdark"), facet_rows = "variable")

5. Now use bayestestR package to check priors are informative

check_prior(model, effects = "all")
##             Parameter Prior_Quality
## 1         b_Intercept   informative
## 2    b_coat_colourmix   informative
## 3   b_coat_colourdark   informative
## 4         b_breedckcs   informative
## 5          b_breedpug   informative
## 6          b_breedret   informative
## 7        b_breedother   informative
## 8 sd_visit__Intercept   informative

These values appear similar to what was set for the priors, so seems OK?

CHECK PRIOR PREDICTION LINES FROM FINAL MODEL

1. Obtain draws of priors from final model

prior <- prior_draws(model)
prior %>% glimpse()
## Rows: 24,000
## Columns: 10
## $ Intercept         <dbl> 0.96084670, 0.46491481, -0.26462236, 0.69449626, 0.3…
## $ b_coat_colourmix  <dbl> -0.28799106, 0.07935388, 0.18844207, 0.50087819, -0.…
## $ b_coat_colourdark <dbl> 0.94357599, -0.39174170, -0.27564698, -0.74237982, -…
## $ b_breedckcs       <dbl> 0.64610478, 0.53333003, 1.64518765, 0.67336808, -0.8…
## $ b_breedpug        <dbl> -1.42358243, 0.13827681, -0.95022571, -0.91958273, -…
## $ b_breedret        <dbl> -0.40108365, 1.44152867, -0.52672062, 0.41224951, 0.…
## $ b_breedother      <dbl> -0.51285359, 1.37645411, 0.87299377, 0.72529536, -1.…
## $ sigma             <dbl> 0.51944386, 0.44354540, 0.84031239, 0.35286383, 0.16…
## $ alpha             <dbl> 5.8251935, 3.5256696, 6.2386522, 3.3732770, 3.146630…
## $ sd_visit          <dbl> 0.09234196, 0.33199112, 0.11917467, 1.27825643, 0.66…

2. Plot prior prediction lines for coat_colour_mix with line plot

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_coat_colourmix * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "Breed",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-3, 3)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

3. Plot prior prediction lines for coat_colour_dark with line plot

set.seed(5)

prior %>% 
  slice_sample(n = 50) %>% 
  rownames_to_column("draw") %>% 
  expand_grid(a = c(0, 1)) %>% 
  mutate(c = Intercept + b_coat_colourdark * a) %>% 
  
  ggplot(aes(x = a, y = c)) +
  geom_line(aes(group = draw),
            color = "firebrick", alpha = .4) +
  geom_point(color = "firebrick", size = 2) +
  labs(x = "Breed",
       y = "log(cort) (std)") +
  coord_cartesian(ylim = c(-3, 3)) +
  theme_bw() +
  theme(panel.grid = element_blank()) 

CHECK PRIOR PREDICTIVE DISTRIBUTION

1. Prior Predictive Distribution

Can simulate data just on the priors. Fit model but only consider prior when fitting model. If this looks reasonable, it helps to confirm that your priors were reasonable

set.seed(666)
model_priors_only <- brm(slgCort ~ coat_colour + breed + (1 | visit),
                   family = skew_normal(),
                   prior = priors,
                   data = df,
                   sample_prior = "only")
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 8e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.8 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.021 seconds (Warm-up)
## Chain 1:                0.017 seconds (Sampling)
## Chain 1:                0.038 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 6e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.06 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.022 seconds (Warm-up)
## Chain 2:                0.016 seconds (Sampling)
## Chain 2:                0.038 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 7e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.023 seconds (Warm-up)
## Chain 3:                0.03 seconds (Sampling)
## Chain 3:                0.053 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 7e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.023 seconds (Warm-up)
## Chain 4:                0.016 seconds (Sampling)
## Chain 4:                0.039 seconds (Total)
## Chain 4:

2. Check predictions against priors

pp_check(model_priors_only, ndraws = 100)

VARIANCE-COVARIANCE MATRIX

as_draws_df(model) %>%
  select(b_Intercept:sigma) %>%
  cov() %>%
  round(digits = 3)
## Warning: Dropping 'draws_df' class as required metadata was removed.
##                     b_Intercept b_coat_colourmix b_coat_colourdark b_breedckcs
## b_Intercept               0.113           -0.039            -0.026      -0.024
## b_coat_colourmix         -0.039            0.082             0.031      -0.015
## b_coat_colourdark        -0.026            0.031             0.053      -0.012
## b_breedckcs              -0.024           -0.015            -0.012       0.135
## b_breedpug               -0.053            0.036             0.021       0.023
## b_breedret               -0.043            0.022            -0.002       0.030
## b_breedother             -0.037            0.007            -0.005       0.032
## sd_visit__Intercept       0.002           -0.001            -0.002      -0.006
## sigma                     0.004            0.000             0.000       0.000
##                     b_breedpug b_breedret b_breedother sd_visit__Intercept
## b_Intercept             -0.053     -0.043       -0.037               0.002
## b_coat_colourmix         0.036      0.022        0.007              -0.001
## b_coat_colourdark        0.021     -0.002       -0.005              -0.002
## b_breedckcs              0.023      0.030        0.032              -0.006
## b_breedpug               0.137      0.041        0.035              -0.001
## b_breedret               0.041      0.081        0.037              -0.001
## b_breedother             0.035      0.037        0.063               0.000
## sd_visit__Intercept     -0.001     -0.001        0.000               0.132
## sigma                    0.000      0.000        0.000              -0.001
##                      sigma
## b_Intercept          0.004
## b_coat_colourmix     0.000
## b_coat_colourdark    0.000
## b_breedckcs          0.000
## b_breedpug           0.000
## b_breedret           0.000
## b_breedother         0.000
## sd_visit__Intercept -0.001
## sigma                0.009

MANUAL POSTERIOR PREDICTIVE DISTRIBUTION CHECKS

NB Uses posterior_predict

1. Posterior predictive distribution plots for coat colour

# use posterior predict to simulate predictions
ppd <- posterior_predict(model) 

par(mfrow = c(2,2))
stripchart(slgCort ~ coat_colour, vertical = TRUE, method = "jitter",
           col = "steelblue3", data = df, pch = 20, main = "Observed")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ coat_colour, vertical = TRUE, method = "jitter",
           col = "firebrick", data = df, pch = 20, main = "PPD")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ coat_colour, vertical = TRUE, method = "jitter",
           col = "firebrick", data = df, pch = 20, main = "PPD")
stripchart(ppd[sample(seq(1, dim(ppd)[1]), 1),] ~ coat_colour, vertical = TRUE, method = "jitter",
           col = "firebrick", data = df, pch = 20, main = "PPD")

ANALYSING THE POSTERIOR DISTRIBUTION

1a. Basic plot of conditional effects from model

plot(conditional_effects(model), ask = FALSE)

1b. advanced plot of conditional effect of coat colour

ce <- conditional_effects(model, effects = "coat_colour")
ce_df <- ce[[1]][c(1, 7:10)]

ggplot(ce_df, aes(x=coat_colour, y=estimate__, group=1)) +
    geom_errorbar(width=.1, aes(ymin=lower__, ymax=upper__), colour=c("#F8766D", "#00BFC4","#7CAE00"), linewidth = 1) +
    geom_point(shape=21, size=6, fill=c("#F8766D", "#00BFC4","#7CAE00")) +
   theme_bw() +
    labs(title = "Conditional effect of coat colour on hair cortisol") +
         labs(y = paste0("Log Hair Cortisol (standardised)")) +
         labs(x = paste0("Coat colour")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey25", size = 12),
               axis.text.y = element_text(color = "grey50", size = 10))

2. mcmc_plot of model

a.just parameters of beta variables

mcmc_plot(model,
          variable = c("b_coat_colourmix", 
                       "b_coat_colourdark",
                       "b_breedckcs", 
                       "b_breedother", 
                       "b_breedpug", 
                       "b_breedret"))

b. just coat colour versus prior

i. distributional

mcmc_plot(model,
          variable = c("b_coat_colourmix", "prior_b_coat_colourmix",
                       "b_coat_colourdark", "prior_b_coat_colourdark"))

2. density
mcmc_plot(model,
          variable = c("b_coat_colourmix", "prior_b_coat_colourmix",
                       "b_coat_colourdark", "prior_b_coat_colourdark"),
          type = "areas") +

   theme_classic() +
    labs(title = "Prior vs posterior distribution for coat colour effect") +
         labs(y = "") +
         labs(x = paste0("Possible parameter values")) +
    scale_y_discrete(labels=c("prior_b_coat_colourmix" = "Prior for mixed", "b_coat_colourmix" = "Posterior for mixed",
                              "prior_b_coat_colourdark" = "Prior for dark", "b_coat_colourdark" = "Posterior for dark"),
                     limits = c("prior_b_coat_colourmix", "b_coat_colourmix",
                              "prior_b_coat_colourdark", "b_coat_colourdark")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

b. all parameters except alpha and sd_visit_Intercept

mcmc_plot(model, 
          variable = c("b_Intercept", "sigma",
                       "b_coat_colourmix", 
                       "b_coat_colourdark",
                       "b_breedckcs", 
                       "b_breedother", 
                       "b_breedpug", 
                       "b_breedret"))

3. Plot all posterior distributions exceot alpha and sd_visit_intercept

posterior <- as.matrix(model)
mcmc_areas(posterior,
           pars = c("Intercept", "sigma",
                   "b_coat_colourmix", 
                       "b_coat_colourdark",
                       "b_breedckcs", 
                       "b_breedother", 
                       "b_breedpug", 
                       "b_breedret"),
# arbitrary threshold for shading probability mass
prob = 0.75)

4a. plot posterior distribution for all betas

posterior <- as.matrix(model)
mcmc_areas(posterior,
           pars = c("b_coat_colourmix", 
                       "b_coat_colourdark",
                       "b_breedckcs", 
                       "b_breedother", 
                       "b_breedpug", 
                       "b_breedret"),
    prob = 0.75) # arbitrary threshold for shading probability mass

4b. Plot posterior distributions for coat colour only

posterior <- as.matrix(model)
mcmc_areas(posterior,
pars = c("b_coat_colourmix", 
         "b_coat_colourdark"),
# arbitrary threshold for shading probability mass
prob = 0.97) +
  
   theme_classic() +
     labs(title = "Posterior distribution for coat colour effect", 
         y = "Density distribution", 
         x = "Possible parameter values") +
     scale_y_discrete(labels=c("b_coat_colourmix" = "Mixed",
                              "b_coat_colourdark" = "Dark")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

5. Describe the posterior visually

# Focus on describing posterior
hdi_range <- hdi(model, ci = c(0.65, 0.70, 0.80, 0.89, 0.95))
plot(hdi_range, show_intercept = T)

just coat colour

# Focus on describing posterior
hdi_range <- hdi(model, ci = c(0.65, 0.70, 0.80, 0.89, 0.95),
                 parameters = c("b_coat_colourmix", 
         "b_coat_colourdark"))
plot(hdi_range, show_intercept = T) +

    labs(title = "Posterior distribution for coat colour effect") +
         labs(y = "Density distribution") +
         labs(x = "Possible parameter values") +
     scale_y_discrete(labels=c("b_coat_colourmix" = "Mixed",
                              "b_coat_colourdark" = "Dark"),
                      limits = c("b_coat_colourmix", "b_coat_colourdark")) +
           theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5),
               axis.text.x = element_text(color = "grey50", size = 12),
               axis.text.y = element_text(color = "grey8",size = 12))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

HYPOTHESIS TESTS

1. light vs. mix (from draws) is >0

draws <- as.matrix(model)
mean(draws[,2] <0)
## [1] 0.9577917
mean(draws[,2] >0)
## [1] 0.04220833

Check 97% credible interval of with HPDI for light vs, mix from draws

HPDI(draws[,2], prob=0.97)
##      |0.97      0.97| 
## -1.1269354  0.1290574

2. light vs. dark (from draws) is >0

draws <- as.matrix(model)
mean(draws[,3] <0)
## [1] 0.9282917
mean(draws[,3] >0)
## [1] 0.07170833

Check 97% credible interval of with HPDI for light vs. dark from draws

HPDI(draws[,3], prob=0.97)
##      |0.97      0.97| 
## -0.8304157  0.1737404

3. Visualising the posterior of a model using numerical and graphical methods

a. basic (one dog only)

# create new dataframe which contains results of the first dog
new_data <- rbind(df[1,], df[1,], df[1,])
# Now change one category to be different
new_data$coat_colour <- c("light", "dark", "mix")
# Visualise df to make sure it has worked
new_data
##   number   group visit season breed_group coat_colour  sex age comorbidity
## 1     c1 stopped    v0 winter         ret       light Male  43         yes
## 2     c1 stopped    v0 winter         ret        dark Male  43         yes
## 3     c1 stopped    v0 winter         ret         mix Male  43         yes
##   fat_percent cortisol   lgCort breed   slgCort
## 1    52.21393  4.92422 1.594166   ret 0.3415375
## 2    52.21393  4.92422 1.594166   ret 0.3415375
## 3    52.21393  4.92422 1.594166   ret 0.3415375
# Now get mean predictions from the draws of the model
pred_means <- posterior_predict(model, newdata = new_data)

# Compare difference in means for coat colours vs light
differenceLM <- pred_means[,1] - pred_means[,2]
differenceDM <- pred_means[,1] - pred_means[,3]


par(mfrow = c(2,2))

# Examine mean of difference
mean(differenceLM)
## [1] 0.3495764
# View histogram of this
hist(differenceLM)
# Create HPDI
HPDI(differenceLM, 0.93)
##     |0.93     0.93| 
## -2.376492  2.991906
# Examine mean of difference
mean(differenceDM)
## [1] 0.5096972
# View histogram of this
hist(differenceDM)
# Create HPDI
HPDI(differenceDM, 0.93)
##     |0.93     0.93| 
## -2.267171  3.163406

b.advanced (all dogs)

i. light vs mix

# create new dataframe which contains results of all dogs
new_data1 <- df
# Now change one category to be different
new_data1$coat_colour <- c("light")

# create new dataframe which contains results of the all dopgs
new_data2 <- df
# Now change one category to be different
new_data1$coat_colour <- c("mix")

# Now get predictions from the draws of the models
pred_nd1 <- posterior_predict(model, newdata = new_data1)
pred_nd2 <- posterior_predict(model, newdata = new_data2)
pred_diff <- pred_nd1 - pred_nd2
pred_diff <- data.frame(pred_diff)

# Create mean of differences for each column (dog) of the dataframe
pred_diff_LM <- apply(pred_diff, 2, mean)
# View histogram of mean differences
hist(pred_diff_LM)

# Examine mean of difference
mean(pred_diff_LM)
## [1] -0.2536659
# View histogram of this

HPDI(pred_diff_LM, 0.97)
##       |0.97       0.97| 
## -0.51743424  0.01037813

i. light vs dark

# create new dataframe which contains results of the first dog
new_data2 <- df
# Now change one category to be different
new_data2$coat_colour <- c("dark")

# Now get predictions from the draws of the models
pred_nd1 <- posterior_predict(model, newdata = new_data1)
pred_nd2 <- posterior_predict(model, newdata = new_data2)
pred_diff <- pred_nd1 - pred_nd2
pred_diff <- data.frame(pred_diff)

# Create mean of differences for each column (dog) of the dataframe
pred_diff_DM <- apply(pred_diff, 2, mean)
# View histogram of mean differences
hist(pred_diff_DM)

# Examine mean of difference
mean(pred_diff_DM)
## [1] -0.1641008
# View histogram of this
HPDI(pred_diff_DM, 0.97)
##      |0.97      0.97| 
## -0.1771495 -0.1446354

6. plot the counterfactual effect of “do sex” on slgCort

a. plot estimates and 95% credible intervals

set.seed(666)
nd <- tibble(visit = 'v0', coat_colour = c("light", "dark", "mix"), breed = "mix")

p1 <-
  predict(model,
          resp = "slgCort",
          newdata = nd) %>% 
  data.frame() %>% 
  bind_cols(nd) %>% 
  
  ggplot(aes(x = coat_colour, y = Estimate, ymin = Q2.5, ymax = Q97.5)) +
  
  geom_linerange(aes(ymin = Q2.5, ymax = Q97.5),
                 linewidth = 1, color = "#F8766D", alpha = 3/5) +
  geom_point(size = 5, color = "#F8766D") +

   theme_bw() +
    labs(title = "Predicted effect of coat colour on hair cortisol") +
         labs(y = paste0("Log hair cortisol (standardised)")) +
         labs(x = paste0("Coat colour")) +
         theme(axis.title.y = element_text(size=12, face="bold"), 
               axis.title.x = element_text(size=12, face="bold"),
               title = element_text(size=12, face="bold"),
               plot.title = element_text(hjust = 0.5)) +
         coord_cartesian(ylim = c(-2.5, 2.5))

plot(p1)

5. Make predictions of log cortisol for each dog and compare with actual data

pred_slgCort <- posterior_epred(model)
av_pred_slgCort <- colMeans(pred_slgCort)
plot(av_pred_slgCort ~ df$slgCort)

Check if better fit if you allow SD to vary across coat colour

1. Set priors

# Set individual priors
prior_int <- set_prior("normal(0, 1.0)", class = "Intercept")
prior_b <- set_prior("normal(0, 1)", class = "b")
prior_b_coat_m <- set_prior("normal(-0.070, 1)", class = "b", coef = "coat_colourmix")
prior_b_coat_d <- set_prior("normal(-0.075, 1)", class = "b", coef = "coat_colourdark")
prior_sd <- set_prior("normal(0, 1)", class = "sd")
prior_alpha <- set_prior("normal(4, 2)", class = "alpha")

# Combine priors into list
priors2 <- c(prior_int, prior_b, prior_b_coat_m, prior_b_coat_d, prior_sd, prior_alpha)

2. Run model 2

Increased adapt_delta >0.8 (0.9 here), as had divergent transitions

set.seed(666)
model2 <- brm(bf(slgCort ~ coat_colour + breed + (1 | visit),
                 sigma ~ coat_colour),
                   family = skew_normal(),
                   prior = priors2,
                   data = df,
                   control=list(adapt_delta=0.999, stepsize = 0.001, max_treedepth =15),
                   iter = 8000, warmup = 2000,
                   cores = 4,
                   save_pars = save_pars(all =TRUE),
                   sample_prior = TRUE)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.0.13.5)’
## using SDK: ‘MacOSX15.5.sdk’
## clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'
## Found more than one class "stanfit" in cache; using the first, from namespace 'rethinking'
## Also defined by 'rstan'

3. get summary of model

summary(model2)
##  Family: skew_normal 
##   Links: mu = identity; sigma = log; alpha = identity 
## Formula: slgCort ~ coat_colour + breed + (1 | visit) 
##          sigma ~ coat_colour
##    Data: df (Number of observations: 73) 
##   Draws: 4 chains, each with iter = 8000; warmup = 2000; thin = 1;
##          total post-warmup draws = 24000
## 
## Multilevel Hyperparameters:
## ~visit (Number of levels: 2) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.41      0.40     0.01     1.52 1.00     8056    10037
## 
## Regression Coefficients:
##                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                 0.20      0.42    -0.66     1.04 1.00     9308
## sigma_Intercept           0.01      0.15    -0.27     0.32 1.00    15896
## coat_colourmix           -0.36      0.36    -1.03     0.39 1.00    15556
## coat_colourdark          -0.37      0.26    -0.88     0.14 1.00    15683
## breedckcs                 0.19      0.37    -0.58     0.89 1.00    17541
## breedpug                 -0.06      0.37    -0.79     0.68 1.00    16540
## breedret                 -0.06      0.28    -0.60     0.48 1.00    14423
## breedother                0.08      0.26    -0.41     0.59 1.00    15227
## sigma_coat_colourmix      0.16      0.24    -0.30     0.64 1.00    15193
## sigma_coat_colourdark    -0.06      0.21    -0.47     0.35 1.00    14666
##                       Tail_ESS
## Intercept                10287
## sigma_Intercept          14670
## coat_colourmix           15787
## coat_colourdark          16443
## breedckcs                16697
## breedpug                 15206
## breedret                 15219
## breedother               13731
## sigma_coat_colourmix     16148
## sigma_coat_colourdark    16093
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## alpha     4.48      1.45     2.02     7.66 1.00    19208    14884
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

4. Try the PSIS LOO-CV procedure to check model performance

loo_model2 <- loo(model2, moment_match = TRUE)
loo_model2
## 
## Computed from 24000 by 73 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo   -106.4  6.3
## p_loo         9.8  1.8
## looic       212.8 12.6
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.6, 1.2]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.

5. Compare looic for models 1 and 2

model <- add_criterion(model, "loo")
## Warning: Found 2 observations with a pareto_k > 0.7 in model 'model'. We
## recommend to set 'moment_match = TRUE' in order to perform moment matching for
## problematic observations.
model2 <- add_criterion(model2, "loo")
## Warning: Found 1 observations with a pareto_k > 0.7 in model 'model2'. We
## recommend to set 'moment_match = TRUE' in order to perform moment matching for
## problematic observations.
loo_compare(model, model2)
##        elpd_diff se_diff
## model   0.0       0.0   
## model2 -2.1       0.9

Model 1 is a better fit so keep this